### UNITED Dataframe prediction

#### Configuration #####
library(dplyr)
library(randomForestSRC)
library(xgboost)
library(MLmetrics)

options(scipen=999)
setwd("E:/Fortis/Workspace/Views Competition/Update September 2020")
memory.size(32000)


###### Load Data #####
rm(united.01)
views.ged <- read.csv("united_01.csv", header=TRUE)

na.omit(views.ged) -> views.ged
# Backup
views.ged -> backup.united

## Load existing Ensembles to continue adding forecasts
Pred.Ensemble.Task1.s3 <- read.csv("Pred_Ensemble_Task1_s3.csv", header=TRUE)
Pred.Ensemble.Task1.s4 <- read.csv("Pred_Ensemble_Task1_s4.csv", header=TRUE)
Pred.Ensemble.Task1.s5 <- read.csv("Pred_Ensemble_Task1_s5.csv", header=TRUE)
Pred.Ensemble.Task1.s6 <- read.csv("Pred_Ensemble_Task1_s6.csv", header=TRUE)
Pred.Ensemble.Task1.s7 <- read.csv("Pred_Ensemble_Task1_s7.csv", header=TRUE)

Pred.Ensemble.Task2.s1 <- read.csv("Pred_Ensemble_Task2_s1.csv", header=TRUE)
Pred.Ensemble.Task2.s2 <- read.csv("Pred_Ensemble_Task2_s2.csv", header=TRUE)
Pred.Ensemble.Task2.s3 <- read.csv("Pred_Ensemble_Task2_s3.csv", header=TRUE)
Pred.Ensemble.Task2.s4 <- read.csv("Pred_Ensemble_Task2_s4.csv", header=TRUE)
Pred.Ensemble.Task2.s5 <- read.csv("Pred_Ensemble_Task2_s5.csv", header=TRUE)
Pred.Ensemble.Task2.s6 <- read.csv("Pred_Ensemble_Task2_s6.csv", header=TRUE)
Pred.Ensemble.Task2.s7 <- read.csv("Pred_Ensemble_Task2_s7.csv", header=TRUE)

Pred.Ensemble.Task3.s1 <- read.csv("Pred_Ensemble_Task3_s1.csv", header=TRUE)
Pred.Ensemble.Task3.s2 <- read.csv("Pred_Ensemble_Task3_s2.csv", header=TRUE)
Pred.Ensemble.Task3.s3 <- read.csv("Pred_Ensemble_Task3_s3.csv", header=TRUE)
Pred.Ensemble.Task3.s4 <- read.csv("Pred_Ensemble_Task3_s4.csv", header=TRUE)
Pred.Ensemble.Task3.s5 <- read.csv("Pred_Ensemble_Task3_s5.csv", header=TRUE)
Pred.Ensemble.Task3.s6 <- read.csv("Pred_Ensemble_Task3_s6.csv", header=TRUE)
Pred.Ensemble.Task3.s7 <- read.csv("Pred_Ensemble_Task3_s7.csv", header=TRUE)

Ensemble.Stacking.Task2.s3 <- read.csv("Ensemble_Stacking_Task2_s3.csv", header=TRUE)
Ensemble.Stacking.Task2.s3 <- read.csv("Ensemble_Stacking_Task2_s3_V2.csv", header=TRUE)

#series2
Ensemble.Stacking.Task2.s3 <- read.csv("Ensemble_Stacking_Task2_s3_series2.csv", header=TRUE)

#series3
Ensemble.Stacking.Task2.s3 <- read.csv("Ensemble_Stacking_Task2_s3_series3.csv", header=TRUE)

## Change to scale=true after running RF versions to rerun in XGBoost ####
## not done in Series 2


# Scale
views.ged -> views.xgb
scale(views.xgb[,5:92]) -> views.xgb[,5:92]
views.xgb -> views.ged

# Reset to unscale
backup.united -> views.ged


## LEARN - PREDICT SETUP for 3 batches of forecasts: #####


### Task 2: January 2017 - Dec 2019 Forecasts (m_id 445-480) ####



# Six sets of test forecasts for sb conflict for each of the months January 2017 - December 2019

# s-1: Setting test set (for s-1 444-479 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 443)
views.ged.pred.t2  <- filter(views.ged, month_id >= 444 & month_id <= 479)

# s-2: Setting test set (for s-2 443-478 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 442)
views.ged.pred.t2  <- filter(views.ged, month_id >= 443 & month_id <= 478)


## Ensemble
# s-3: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 441)
views.ged.pred.t2  <- filter(views.ged, in_africa == 1 & month_id >= 114 & month_id <= 441)



# s-4: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 440)
views.ged.pred.t2  <- filter(views.ged, month_id >= 441 & month_id <= 476)

# s-5: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 439)
views.ged.pred.t2  <- filter(views.ged, month_id >= 440 & month_id <= 475)

# s-6: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 438)
views.ged.pred.t2  <- filter(views.ged, month_id >= 439 & month_id <= 474)

# s-7: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 109 & month_id <= 437)
views.ged.pred.t2  <- filter(views.ged, month_id >= 438 & month_id <= 473)


#+#


### Task 3: January 2014 - Dec 2016 Forecasts (m_id 409-444) ####

# s-1: Setting test set (for s-1 408-443 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 407)
views.ged.pred.t3  <- filter(views.ged, month_id >= 408 & month_id <= 443)

# s-2: Setting test set (for s-2 407-442 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 406)
views.ged.pred.t3  <- filter(views.ged, month_id >= 407 & month_id <= 442)

# s-3: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 405)
views.ged.pred.t3  <- filter(views.ged, month_id >= 406 & month_id <= 441)

# s-4: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 404)
views.ged.pred.t3  <- filter(views.ged, month_id >= 405 & month_id <= 440)

# s-5: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 403)
views.ged.pred.t3  <- filter(views.ged, month_id >= 404 & month_id <= 439)

# s-6: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 402)
views.ged.pred.t3  <- filter(views.ged, month_id >= 403 & month_id <= 438)

# s-7: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 109 & month_id <= 401)
views.ged.pred.t3  <- filter(views.ged, month_id >= 402 & month_id <= 437)


#+#

### Task 1: True forecasts Oct 2020 - March 2021 (m_id 490-495) ####
### Learn for prediction of October 2020 s2 (learn only till 486 possible)
ged.learn.true <- filter(views.ged, month_id >= 109 & month_id <= 486)
ged.pred.true  <- filter(views.ged, month_id == 488)

### Learn for prediction of November 2020 s3 (learn only till 485 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 485)
ged.pred.true  <- filter(views.ged, month_id >= 487 & month_id <= 488)

### Learn for prediction of Dezember 2020 s4 (learn only till 484 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 484)
ged.pred.true  <- filter(views.ged, month_id >= 486 & month_id <= 488)

### Learn for prediction of January 2021 s5 (learn only till 483 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 483)
ged.pred.true  <- filter(views.ged, month_id >= 485 & month_id <= 488)

### Learn for prediction of February 2021 s6 (learn only till 482 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 482)
ged.pred.true  <- filter(views.ged, month_id >= 484 & month_id <= 488)

### Learn for prediction of March 2021 s7 (learn only till 481 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 481)
ged.pred.true  <- filter(views.ged, month_id >= 483 & month_id <= 488)


#+#+#+#+#+#+#
# RF Predictions #####
#+#+#+#+#+#+#

##### RF.01 prediction test ######
rf.ged.01 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn[,c(7:48, 57)], ntree = 75,
                        na.action="na.impute")

rf.ged.01

prediction.ged.01 <- predict(rf.ged.01, views.ged.pred, na.action = "na.impute")

prediction.ged.01
#+#+# #####

#### RF Task 2 - s1-6 Forecasts #####
rf.t2.united.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t2[,c(5:92, 93)], ntree = 350,
                      na.action="na.impute")

rf.t2.united.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t2[,c(5:92, 94)], ntree = 350,
                   na.action="na.impute")

rf.t2.united.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t2[,c(5:92, 95)], ntree = 350,
                      na.action="na.omit")

rf.t2.united.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t2[,c(5:92, 96)], ntree = 350,
                      na.action="na.impute")

rf.t2.united.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t2[,c(5:92, 97)], ntree = 350,
                      na.action="na.impute")

rf.t2.united.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t2[,c(5:92, 98)], ntree = 350,
                      na.action="na.impute")

rf.t2.united.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t2[,c(5:92, 99)], ntree = 350,
                         na.action="na.impute")

rf.t2.united.s1
rf.t2.united.s2
rf.t2.united.s3
rf.t2.united.s4
rf.t2.united.s5
rf.t2.united.s6
rf.t2.united.s7

rm(rf.t2.united.s1)
rm(rf.t2.united.s2)
rm(rf.t2.united.s3)
rm(rf.t2.united.s4)
rm(rf.t2.united.s5)
rm(rf.t2.united.s6)

## Predict Task 2
pr.t2.united.s1 <- predict(rf.t2.united.s1, views.ged.pred.t2, na.action = "na.impute")
pr.t2.united.s2 <- predict(rf.t2.united.s2, views.ged.pred.t2, na.action = "na.impute")
pr.t2.united.s3 <- predict(rf.t2.united.s3, views.ged.pred.t2, na.action = "na.omit")
pr.t2.united.s4 <- predict(rf.t2.united.s4, views.ged.pred.t2, na.action = "na.impute")
pr.t2.united.s5 <- predict(rf.t2.united.s5, views.ged.pred.t2, na.action = "na.impute")
pr.t2.united.s6 <- predict(rf.t2.united.s6, views.ged.pred.t2, na.action = "na.impute")
pr.t2.united.s7 <- predict(rf.t2.united.s7, views.ged.pred.t2, na.action = "na.omit")


pr.t2.united.s1
pr.t2.united.s2
pr.t2.united.s3
pr.t2.united.s4
pr.t2.united.s5
pr.t2.united.s6
pr.t2.united.s7

rm(pr.t2.united.s1)
rm(pr.t2.united.s2)
rm(pr.t2.united.s3)
rm(pr.t2.united.s4)
rm(pr.t2.united.s5)
rm(pr.t2.united.s6)

#### RF Task 3 - s1-6 Forecasts #####
rf.t3.united.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t3[,c(5:92, 93)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t3[,c(5:92, 94)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t3[,c(5:92, 95)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t3[,c(5:92, 96)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t3[,c(5:92, 97)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t3[,c(5:92, 98)], ntree = 350,
                      na.action="na.impute")

rf.t3.united.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t3[,c(5:92, 99)], ntree = 350,
                         na.action="na.impute")

rm(rf.t3.united.s1)
rm(rf.t3.united.s2)
rm(rf.t3.united.s3)
rm(rf.t3.united.s4)

rf.t3.united.s1
rf.t3.united.s2
rf.t3.united.s3
rf.t3.united.s4
rf.t3.united.s5
rf.t3.united.s6
rf.t3.united.s7

## Predict Task 3
pr.t3.united.s1 <- predict(rf.t3.united.s1, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s2 <- predict(rf.t3.united.s2, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s3 <- predict(rf.t3.united.s3, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s4 <- predict(rf.t3.united.s4, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s5 <- predict(rf.t3.united.s5, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s6 <- predict(rf.t3.united.s6, views.ged.pred.t3, na.action = "na.impute")
pr.t3.united.s7 <- predict(rf.t3.united.s7, views.ged.pred.t3, na.action = "na.impute")

rm(pr.t3.united.s1)
rm(pr.t2.united.s2)
rm(pr.t3.united.s3)

pr.t3.united.s1
pr.t3.united.s2
pr.t3.united.s3
pr.t3.united.s4
pr.t3.united.s5
pr.t3.united.s6
pr.t3.united.s7




#####################
#####################
#####################
#######
#######
#####################
#####################
#####################
##############
##############
##############
#####################
#####################
#####################

views.ged.pred.t2 -> test
views.ged.pred.t2 -> test2

views.ged.pred.t2 -> test3
views.ged.pred.t2 -> test4


pr.t2.united.s2[["predicted"]] -> test$pr.s2.without.Imp
pr.t2.united.s2[["predicted"]] -> test$pr.s2.with.Imp

pr.t2.united.s7[["predicted"]] -> test3$pr.s7.without.Imp
pr.t2.united.s7[["predicted"]] -> test3$pr.s7.with.Imp


test.africa <- filter(test, in_africa==1)
test.africa2 <- filter(test2, in_africa==1)
test.africa3 <- filter(test3, in_africa==1)

MSE(test.africa$ln_ged_best_sb_s2, test.africa$pr.s2.without.Imp)
MSE(test.africa$ln_ged_best_sb_s2, test.africa$pr.s2.with.Imp)

MSE(test.africa3$ln_ged_best_sb_s7, test.africa3$pr.s7.without.Imp)
MSE(test.africa3$ln_ged_best_sb_s7, test.africa3$pr.s7.with.Imp)

MSE(test3$pr.s7.with.Imp, test3$pr.s7.without.Imp)

write.csv(test2, file="NA_Testing_T2_S7.csv", row.names=FALSE)
























### RF Task 1 - True 2020-21 predictions #####

### True S2 - October 2020
rf.t1.united.s2 <- rfsrc(ln_ged_best_sb_s2 ~., ged.learn.true[,c(5:92, 94)], ntree = 350,
                           na.action="na.impute")

### True S3 - Oct & Nov 2020
rf.t1.united.s3 <- rfsrc(ln_ged_best_sb_s3 ~., ged.learn.true[,c(5:92, 95)], ntree = 350,
                      na.action="na.impute")

### True S4 - Oct & Nov & Dec 2020
rf.t1.united.s4 <- rfsrc(ln_ged_best_sb_s4 ~., ged.learn.true[,c(5:92, 96)], ntree = 350,
                      na.action="na.impute")

### True S5 - Oct & Nov & Dec 2020 & Jan 2021
rf.t1.united.s5 <- rfsrc(ln_ged_best_sb_s5 ~., ged.learn.true[,c(5:92, 97)], ntree = 350,
                      na.action="na.impute")

### True S6 - Oct & Nov & Dec 2020 & Jan & Feb 2021
rf.t1.united.s6 <- rfsrc(ln_ged_best_sb_s6 ~., ged.learn.true[,c(5:92, 98)], ntree = 350,
                      na.action="na.impute")

### True S7 - Oct & Nov & Dec 2020 & Jan & Feb & March 2021
rf.t1.united.s7 <- rfsrc(ln_ged_best_sb_s7 ~., ged.learn.true[,c(5:92, 99)], ntree = 350,
                      na.action="na.impute")

rf.t1.united.s2
rf.t1.united.s3
rf.t1.united.s4
rf.t1.united.s5
rf.t1.united.s6
rf.t1.united.s7


pr.t1.united.s2 <- predict(rf.t1.united.s2, ged.pred.true, na.action = "na.impute")
pr.t1.united.s3 <- predict(rf.t1.united.s3, ged.pred.true, na.action = "na.impute")
pr.t1.united.s4 <- predict(rf.t1.united.s4, ged.pred.true, na.action = "na.impute")
pr.t1.united.s5 <- predict(rf.t1.united.s5, ged.pred.true, na.action = "na.impute")
pr.t1.united.s6 <- predict(rf.t1.united.s6, ged.pred.true, na.action = "na.impute")
pr.t1.united.s7 <- predict(rf.t1.united.s7, ged.pred.true, na.action = "na.impute")


rm(pr.t1.ged.s3)
rm(rf.t1.ged.s3)
rm(pr.t1.ged.s4)
rm(rf.t1.ged.s4)
rm(rf.t1.united.s2)
rm(rf.t1.united.s3)
rm(pr.t2.united.s1)
rm(pr.t1.united.s2)

rm(pr.t1.united.s3)
rm(pr.t1.united.s4)
rm(pr.t1.united.s5)

rm(rf.t1.united.s4)
rm(rf.t1.united.s5)

##### XG Boost prediction #####
## Scaling for XGBoost ####
rm(views.learn.xgb)

views.ged -> views.xgb
scale(views.xgb[,7:48]) -> views.xgb[,7:48]

views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 442)
views.pred.xgb  <- filter(views.xgb, month_id >= 443 & month_id <= 478)
#####


### s2 - month 490 ####
views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 486)
views.pred.xgb  <- filter(views.xgb, month_id == 488)

na.omit(views.learn.xgb) -> views.learn.xgb
na.omit(views.pred.xgb) -> views.pred.xgb
#####

#### NA omit for Task 1 ####
na.omit(ged.learn.true) -> views.learn.xgb
ged.pred.true -> views.pred.xgb


## STACKING
#### NA omit for Task 2 ####
na.omit(views.ged.learn.t2) -> views.learn.xgb
na.omit(views.ged.pred.t2) -> views.pred.xgb

#### NA omit for Task 3 ####
na.omit(views.ged.learn.t3) -> views.learn.xgb
views.ged.pred.t3 -> views.pred.xgb

#####

### careful all models merge here
#### prepare data for XGBoost by setting label and train as matrix
#*'!!!'*# ADJUST S-X Variable!

train.data.xgb = as.matrix(views.learn.xgb[,5:92])
train.label.xgb = as.matrix(views.learn.xgb[, 95])
test.data.xgb = as.matrix(views.pred.xgb[,5:92])
test.label.xgb = as.matrix(views.pred.xgb[, 95])



### XGBoost train ####
xgb.001 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                   objective = "reg:linear", 
                   eval_metric = "rmse",
                   max.depth =25, 
                   eta = 0.01, 
                   nround = 950, 
                   subsample = 0.5, 
                   colsample_bytree = 0.5, 
                   nthread = 3
)

### TASK 1 XGBoost Train ####
xgb.t1 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)

### Task 2 XGBoost Train ####
rm(xgb.t2)
xgb.t2 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                       objective = "reg:linear", 
                       eval_metric = "rmse",
                       max.depth = 25, 
                       eta = 0.01, 
                       nround = 950, 
                       subsample = 0.5, 
                       colsample_bytree = 0.5
)

### Task 3 XGBoost Train ####
xgb.t3 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)


##### rename xgb object ####
xgb.t2 -> xgb.t1
rm(xgb.t2)

## Test #####
importance.xgb.001 <- xgb.importance(model=xgb.001)

xgb.pre.001 = predict(xgb.001,test.data.xgb,reshape=T)
xgb.pre.001.frame <- as.data.frame(xgb.pre.001)
xgb.pre.001.frame$real <- test.label.xgb




## Task 1 #####
importance.xgb.t1.s3 <- xgb.importance(model=xgb.t1)
write.csv(importance.xgb.t1.s3, file="importance_task1_s3_ged.csv", row.names=FALSE)

xgb.pre.t1 = predict(xgb.t1, test.data.xgb, reshape=T)
xgb.pre.t1.frame <- as.data.frame(xgb.pre.t1)


## Task 2 #####
imp.xgb.t2.s1.united <- xgb.importance(model=xgb.t2)
write.csv(imp.xgb.t2.s1.united, file="importance_task2_s1_united.csv", row.names=FALSE)

xgb.pre.t2 = predict(xgb.t2, test.data.xgb, reshape=T)
xgb.pre.t2.frame <- as.data.frame(xgb.pre.t2)

## Task 3 #####
imp.xgb.t3.s1.united <- xgb.importance(model=xgb.t3)
write.csv(imp.xgb.t3.s1.united, file="importance_task3_s1_united.csv", row.names=FALSE)

xgb.pre.t3 = predict(xgb.t3, test.data.xgb, reshape=T)
xgb.pre.t3.frame <- as.data.frame(xgb.pre.t3)

#### Collect Prediction Results in Dataframe #####

Pred.Ensemble.GED <- as.data.frame(prediction.ged.01[["predicted"]])
colnames(Pred.Ensemble.GED) <- c("RF.01")

Pred.Ensemble.GED$XGBoost.01 <- xgb.pre.001.frame$xgb.pre.001

# RF.pred.compare2$real.sb <- views.pred.small$ln_ged_best_sb

Pred.Ensemble.GED$month_id <- views.ged.pred$month_id+2
Pred.Ensemble.GED$country_id <- views.ged.pred$country_id
Pred.Ensemble.GED$country_name <- views.ged.pred$country_name
Pred.Ensemble.GED$real <- prediction.ged.01[["yvar"]]

write.csv(Pred.Ensemble.GED, file="Pred_Ensemble_GED.csv", row.names=FALSE) 


## MSE test
MSE(Pred.Ensemble.GED$RF.01, Pred.Ensemble.GED$real)
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$real)

## sync test
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$RF.01)

#####

#### Collect Prediction Results in Dataframe for Task 2: 2017-2019 predictions #####


#####################
#####################
#####################
#######
#######
#####################
#####################
#####################


#### Collect Prediction Results in Dataframe for Task 2: 2017-2019 predictions #####


## Ensemble stacking

Ensemble.Stacking.Task2.s3.united <- as.data.frame(pr.t2.united.s3[["predicted"]])

Ensemble.Stacking.Task2.s3.united$month_id <- views.pred.xgb$month_id+3
Ensemble.Stacking.Task2.s3.united$country_id <- views.pred.xgb$country_id

Ensemble.Stacking.Task2.s3.united$XGB.United.t2.s3 <- xgb.pre.t2.frame$xgb.pre.t2

Ensemble.Stacking.Task2.s3.V2 <- left_join(Ensemble.Stacking.Task2.s3, Ensemble.Stacking.Task2.s3.united, by=c("month_id", "country_id"))



write.csv(Ensemble.Stacking.Task2.s3.V2, file="Ensemble_Stacking_Task2_s3_V2_series3.csv", row.names=FALSE)


Ensemble.Stacking.Task2.s3 <- rename(Ensemble.Stacking.Task2.s3.V2,c('RF.United.t2.s3'='pr.t2.united.s3[["predicted"]]'))

views.ged.pred.t2[,c(2:3,95)] -> merge.true.t2.s3
Ensemble.Stacking.Task2.s3.V3 <- left_join(Ensemble.Stacking.Task2.s3, merge.true.t2.s3, by=c("month_id", "country_id"))

Ensemble.Stacking.Task2.s3.V3[,c(3,4,5,8,1,2,6,7)] -> Ensemble.Stacking.Task2.s3.V4

write.csv(Ensemble.Stacking.Task2.s3.V4, file="Ensemble_Stacking_Task2_s3_V3_series3.csv", row.names=FALSE)



#+#+#+#


## S-1
Pred.Ensemble.Task2.s1 <- read.csv("Pred_Ensemble_Task2_s1.csv", header=TRUE)

Pred.Ensemble.Task2.s1$RF.United.t2.s1 <- pr.t2.united.s1[["predicted"]]
Pred.Ensemble.Task2.s1$XGB.United.t2.s1 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s1, file="Pred_Ensemble_Task2_s1.csv", row.names=FALSE)


## s-2
Pred.Ensemble.Task2.s2 <- read.csv("Pred_Ensemble_Task2_s2.csv", header=TRUE)

Pred.Ensemble.Task2.s2$RF.United.t2.s2 <- pr.t2.united.s2[["predicted"]]
Pred.Ensemble.Task2.s2$XGB.United.t2.s2 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s2, file="Pred_Ensemble_Task2_s2.csv", row.names=FALSE)


## s-3
Pred.Ensemble.Task2.s3 <- read.csv("Pred_Ensemble_Task2_s3.csv", header=TRUE)

Pred.Ensemble.Task2.s3$RF.United.t2.s3 <- pr.t2.united.s3[["predicted"]]
Pred.Ensemble.Task2.s3$XGB.United.t2.s3 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s3, file="Pred_Ensemble_Task2_s3.csv", row.names=FALSE)


## s-4
Pred.Ensemble.Task2.s4 <- read.csv("Pred_Ensemble_Task2_s4.csv", header=TRUE)

Pred.Ensemble.Task2.s4$RF.United.t2.s4 <- pr.t2.united.s4[["predicted"]]
Pred.Ensemble.Task2.s4$XGB.United.t2.s4 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s4, file="Pred_Ensemble_Task2_s4.csv", row.names=FALSE)


## s-5
Pred.Ensemble.Task2.s5 <- read.csv("Pred_Ensemble_Task2_s5.csv", header=TRUE)

Pred.Ensemble.Task2.s5$RF.United.t2.s5 <- pr.t2.united.s5[["predicted"]]
Pred.Ensemble.Task2.s5$XGB.United.t2.s5 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s5, file="Pred_Ensemble_Task2_s5.csv", row.names=FALSE)


## s-6
Pred.Ensemble.Task2.s6 <- read.csv("Pred_Ensemble_Task2_s6.csv", header=TRUE)

Pred.Ensemble.Task2.s6$RF.United.t2.s6 <- pr.t2.united.s6[["predicted"]]
Pred.Ensemble.Task2.s6$XGB.United.t2.s6 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s6, file="Pred_Ensemble_Task2_s6.csv", row.names=FALSE)


## s-7
Pred.Ensemble.Task2.s7 <- read.csv("Pred_Ensemble_Task2_s7.csv", header=TRUE)

Pred.Ensemble.Task2.s7$month_id <- views.ged.pred.t2$month_id+7
Pred.Ensemble.Task2.s7$country_id <- views.ged.pred.t2$country_id
Pred.Ensemble.Task2.s7$country_name <- views.ged.pred.t2$country_name

Pred.Ensemble.Task2.s7$RF.United.t2.s7 <- pr.t2.united.s7[["predicted"]]
Pred.Ensemble.Task2.s7$XGB.United.t2.s7 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s7, file="Pred_Ensemble_Task2_s7.csv", row.names=FALSE)



##check MSE between XGB & RF - Adjust all (4x) sX
MSE(Pred.Ensemble.Task2.s1$RF.United.t2.s1, Pred.Ensemble.Task2.s1$XGB.United.t2.s1)
#+#


#### Collect Prediction Results in Dataframe for Task 3: 2014-2016 predictions #####

## S-1
Pred.Ensemble.Task3.s1 <- read.csv("Pred_Ensemble_Task3_s1.csv", header=TRUE)

Pred.Ensemble.Task3.s1$RF.United.t3.s1 <- pr.t3.united.s1[["predicted"]]
Pred.Ensemble.Task3.s1$XGB.United.t3.s1 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s1, file="Pred_Ensemble_Task3_s1.csv", row.names=FALSE)


## S-2
Pred.Ensemble.Task3.s2 <- read.csv("Pred_Ensemble_Task3_s2.csv", header=TRUE)

Pred.Ensemble.Task3.s2$RF.United.t3.s2 <- pr.t3.united.s2[["predicted"]]
Pred.Ensemble.Task3.s2$XGB.United.t3.s2 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s2, file="Pred_Ensemble_Task3_s2.csv", row.names=FALSE)


## S-3
Pred.Ensemble.Task3.s3 <- read.csv("Pred_Ensemble_Task3_s3.csv", header=TRUE)

Pred.Ensemble.Task3.s3$RF.United.t3.s3 <- pr.t3.united.s3[["predicted"]]
Pred.Ensemble.Task3.s3$XGB.United.t3.s3 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s3, file="Pred_Ensemble_Task3_s3.csv", row.names=FALSE)


## S-4
Pred.Ensemble.Task3.s4 <- read.csv("Pred_Ensemble_Task3_s4.csv", header=TRUE)

Pred.Ensemble.Task3.s4$RF.United.t3.s4 <- pr.t3.united.s4[["predicted"]]
Pred.Ensemble.Task3.s4$XGB.United.t3.s4 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s4, file="Pred_Ensemble_Task3_s4.csv", row.names=FALSE)


## S-5
Pred.Ensemble.Task3.s5 <- read.csv("Pred_Ensemble_Task3_s5.csv", header=TRUE)

Pred.Ensemble.Task3.s5$RF.United.t3.s5 <- pr.t3.united.s5[["predicted"]]
Pred.Ensemble.Task3.s5$XGB.United.t3.s5 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s5, file="Pred_Ensemble_Task3_s5.csv", row.names=FALSE)


## S-6
Pred.Ensemble.Task3.s6 <- read.csv("Pred_Ensemble_Task3_s6.csv", header=TRUE)

Pred.Ensemble.Task3.s6$RF.United.t3.s6 <- pr.t3.united.s6[["predicted"]]
Pred.Ensemble.Task3.s6$XGB.United.t3.s6 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s6, file="Pred_Ensemble_Task3_s6.csv", row.names=FALSE)

## S-7
Pred.Ensemble.Task3.s7 <- read.csv("Pred_Ensemble_Task3_s7.csv", header=TRUE)

Pred.Ensemble.Task3.s7$RF.United.t3.s7 <- pr.t3.united.s7[["predicted"]]
Pred.Ensemble.Task3.s7$XGB.United.t3.s7 <- xgb.pre.t3.frame$xgb.pre.t3

write.csv(Pred.Ensemble.Task3.s7, file="Pred_Ensemble_Task3_s7.csv", row.names=FALSE)




#### Collect Prediction Results in Dataframe for Task 1: 2020-21 predictions #####

## s-2
Pred.Ensemble.Task1.s2 <- read.csv("Pred_Ensemble_Task1_s2.csv", header=TRUE)

Pred.Ensemble.Task1.s2$RF.United.t1.s2 <- pr.t1.united.s2[["predicted"]]
Pred.Ensemble.Task1.s2$XGB.United.t1.s2 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s2, file="Pred_Ensemble_Task1_s2.csv", row.names=FALSE)

## s-3
Pred.Ensemble.Task1.s3 <- read.csv("Pred_Ensemble_Task1_s3.csv", header=TRUE)

Pred.Ensemble.Task1.s3$RF.United.t1.s3 <- pr.t1.united.s3[["predicted"]]
Pred.Ensemble.Task1.s3$XGB.United.t1.s3 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s3, file="Pred_Ensemble_Task1_s3.csv", row.names=FALSE)


## s-4
Pred.Ensemble.Task1.s4 <- read.csv("Pred_Ensemble_Task1_s4.csv", header=TRUE)

Pred.Ensemble.Task1.s4$RF.United.t1.s4 <- pr.t1.united.s4[["predicted"]]
Pred.Ensemble.Task1.s4$XGB.United.t1.s4 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s4, file="Pred_Ensemble_Task1_s4.csv", row.names=FALSE)


## s-5
Pred.Ensemble.Task1.s5 <- read.csv("Pred_Ensemble_Task1_s5.csv", header=TRUE)

Pred.Ensemble.Task1.s5$RF.United.t1.s5 <- pr.t1.united.s5[["predicted"]]
Pred.Ensemble.Task1.s5$XGB.United.t1.s5 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s5, file="Pred_Ensemble_Task1_s5.csv", row.names=FALSE)


## s-6
Pred.Ensemble.Task1.s6 <- read.csv("Pred_Ensemble_Task1_s6.csv", header=TRUE)

Pred.Ensemble.Task1.s6$RF.United.t1.s6 <- pr.t1.united.s6[["predicted"]]
Pred.Ensemble.Task1.s6$XGB.United.t1.s6 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s6, file="Pred_Ensemble_Task1_s6.csv", row.names=FALSE)


## s-7
Pred.Ensemble.Task1.s7 <- read.csv("Pred_Ensemble_Task1_s7.csv", header=TRUE)

Pred.Ensemble.Task1.s7$RF.United.t1.s7 <- pr.t1.united.s7[["predicted"]]
Pred.Ensemble.Task1.s7$XGB.United.t1.s7 <- xgb.pre.t1.frame$xgb.pre.t1

write.csv(Pred.Ensemble.Task1.s7, file="Pred_Ensemble_Task1_s7.csv", row.names=FALSE)



## old setup s-2
Pred.Ensemble.October2020 <- as.data.frame(pred.s2.true[["predicted"]])
colnames(Pred.Ensemble.October2020) <- c("RF.GED.s2.true")

Pred.Ensemble.October2020$XGBoost.GED.s2.true <- xgb.pre.s2.true.frame$xgb.pre.s2.true

write.csv(Pred.Ensemble.October2020, file="Pred_Ensemble_Oct2020.csv", row.names=FALSE)


####
Pred.Ensemble.October2020$month_id <- ged.pred.s2.monthid.490$month_id+2
Pred.Ensemble.October2020$country_id <- ged.pred.s2.monthid.490$country_id
Pred.Ensemble.October2020$country_name <- ged.pred.s2.monthid.490$country_name

#reorder by column index
data <- data[c(1,3,2)]

####
obj  <- filter(Pred.Ensemble.November2020, month_id=490)
# left_join obj

MSE(Pred.Ensemble.October2020$RF.GED.s2.monthid.490, Pred.Ensemble.October2020$XGBoost.GED.s2.monthid.490)
MSE(Pred.Ensemble.Task1.s3$XGB.GED.t1.s3, Pred.Ensemble.Task1.s3$RF.GED.t1.s3)
MSE(Pred.Ensemble.Task1.s7$XGB.GED.t1.s7, Pred.Ensemble.Task1.s7$RF.GED.t1.s7)

MSE(Pred.Ensemble.Task1.s5$XGB.United.t1.s5, Pred.Ensemble.Task1.s5$RF.United.t1.s5)
MSE(Pred.Ensemble.Task1.s6$XGB.United.t1.s6, Pred.Ensemble.Task1.s6$RF.United.t1.s6)
MSE(Pred.Ensemble.Task1.s7$XGB.United.t1.s7, Pred.Ensemble.Task1.s7$RF.United.t1.s7)

MSE(Pred.Ensemble.Task1.s2$XGB.United.t1.s2, Pred.Ensemble.Task1.s2$RF.United.t1.s2)
MSE(Pred.Ensemble.Task1.s2$XGB.United.t1.s2, Pred.Ensemble.Task1.s2$XGBoost.UNITED.s2.monthid.490)
